Data Import

url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table21.htm"
url1 = "https://www.health.ny.gov/statistics/vital_statistics/2014/table07.htm"
induced_abortion_age = 
  read_html(url) %>%
  html_table(header = FALSE) %>%
  first() %>%
  janitor::clean_names()
live_birth_age = 
  read_html(url1) %>%
  html_table(header = FALSE) %>%
  first() %>%
  janitor::clean_names()

Data cleaning

data cleaning live birth data

data cleaning induced abortions data

clean_ia_age=
induced_abortion_age %>%
  select(1:8) %>%
  purrr::set_names(c("borough", "total", "age_less_20", "age_20_24", "age_25_29", "age_30_34", "age_35_39", "age_plus_40")) %>%
  slice(14:70) %>% 
  mutate(
    borough = str_replace(borough, "Kings", "Brooklyn"),
    borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
    borough = str_replace(borough, "Richmond", "Staten_Island"),
    total = str_replace(total, ",", ""),
    age_less_20 = str_replace(age_less_20, ",", ""),
    age_20_24 = str_replace(age_20_24, ",", ""),
    age_25_29 = str_replace(age_25_29, ",", ""),
    age_35_39 = str_replace(age_30_34, ",", ""),
    age_plus_40 = str_replace(age_plus_40, ",", "")
    ) %>%
  mutate_at(c("total", "age_less_20", "age_20_24", "age_25_29", "age_30_34", "age_35_39", "age_plus_40"), as.numeric) %>%
    janitor::clean_names()

Renaming

created regions based on this website: https://statejobs.ny.gov/assets/help/regionMapText.cfm

renaming live births data of counties into regions.

rename_lb_age= clean_lb_age %>% 
  transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
  transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton", replacement = "Eastern Adirondacks", borough)) %>%
  transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
  transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>% 
  transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>% 
  transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>% 
  transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>% 
  transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>% 
  transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>% 
  group_by(borough) %>% 
  summarize (
    total_rate= sum(total), 
    age_less_20= sum(age_less_20),
    age_20_24= sum(age_20_24), 
    age_25_29= sum(age_25_29),
    age_30_34= sum(age_30_34),
    age_35_39 = sum(age_35_39),
    age_plus_40 = sum(age_plus_40))

renaming induced abortion data of counties into regions.

rename_ia_age= clean_ia_age %>% 
  transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
  transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton|Essex/Hamilton|Hamilton/Essex", replacement = "Eastern Adirondacks", borough)) %>%
  transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
  transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>% 
  transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>% 
  transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>% 
  transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>% 
  transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>% 
  transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>% 
  group_by(borough) %>% 
  summarize (
    total_rate= sum(total), 
    age_less_20= sum(age_less_20),
    age_20_24= sum(age_20_24), 
    age_25_29= sum(age_25_29),
    age_30_34= sum(age_30_34),
    age_35_39 = sum(age_35_39),
    age_plus_40 = sum(age_plus_40))

Merging

merging data to get rates from count data

merged_data =
  full_join(rename_lb_age, rename_ia_age, by = "borough") %>%
  janitor::clean_names() %>%
  mutate(
    age_less_20 = (age_less_20_y / age_less_20_x)*1000,
    age_20_24 = (age_20_24_y / age_20_24_x)*1000,
    age_25_29 = (age_25_29_y / age_25_29_x)*1000,
    age_30_34 = (age_30_34_y / age_30_34_x)*1000,
    age_35_39 = (age_35_39_y / age_35_39_x)*1000,
    age_plus_40 = (age_plus_40_y / age_plus_40_x)*1000,
    total = (total_rate_y / total_rate_x)*1000) %>% 
select(borough, age_less_20, age_20_24, age_25_29, age_30_34, age_35_39, age_plus_40, total)

Plotting

ggplot: induced abortion rates vs financial plans

abortion_age_plot = 
  merged_data %>%
  select(-total) %>% 
  pivot_longer(
    age_less_20:age_plus_40,
    names_to = "age", 
    values_to = "abortion"
  ) %>% 
  plot_ly(x = ~age, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>% 
  layout(title = 'Abortions in NY State by Age', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))

abortion_age_plot